Libraries

## Loading required package: lattice
## Loading required package: ggplot2
## Loading required package: xml2
## 
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
## 
##     train
# _____ Hyper-parameters _____

nObs <- 1000  # It must be even
nSigma <- 10

sigmas <- seq(from = 1e-4, to = 1, length.out = nSigma)
indexSigma <- 10

# _______________
for (indexSigma in 1:nSigma) {
  sigma <- sigmas[indexSigma]
  
  set.seed(1998)
  labelPosition <- sample(1:nObs, nObs/2)
  
  y <- rep(0, nObs)
  y[labelPosition] <- 1
  
  set.seed(1998)
  x <- y + rnorm(length(y), mean = 0, sd = sigma)
  dd <- data.frame("observation" = 1:nObs, 
                   "Feature" = x, 
                   "Label" = as.character(y))
  
  print(ggplot(dd, aes(x = observation, y = Feature, col = Label)) +
          geom_point() + xlab(element_blank()) + 
          ggtitle(paste("Features M1 - Sigma = ", sigma)))
  
  set.seed(20)  # No poner 1998!
  inSampleInd <- sample(length(x), ceiling(0.8*nObs))
  trnInd <- sample(length(inSampleInd), ceiling(0.8*length(inSampleInd)))
  
  xinSample <- x[inSampleInd]
  yinSample <- y[inSampleInd]
  
  xTrn <- xinSample[trnInd]
  yTrn <- yinSample[trnInd]
  
  xVal <- xinSample[-trnInd]
  yVal <- yinSample[-trnInd]
  
  xTst <- x[-inSampleInd]
  yTst <- y[-inSampleInd]
  
  set.seed(1998)
  primaryModel <- keras_model_sequential() %>%
    layer_flatten(input_shape = 1) %>%
    layer_dense(units = 20) %>%
    layer_activation_leaky_relu() %>%
    layer_dense(units = 1, activation = "sigmoid")
  
  # Input format:
  # x <-> Matrix
  # y <-> vector
  
  primaryModel %>%
    compile(
      optimizer = "rmsprop",
      loss = "binary_crossentropy",
      metrics = c("Recall", "Precision")
    )
  
  history <- primaryModel %>% fit(
    x = xTrn, 
    y = yTrn,
    epochs = 50,
    batch_size = 256,
    validation_data = list(xVal, yVal)
    # validation_split = 0.2
  )
    
  yHatTrn <- primaryModel %>% predict(xTrn)
  yHatVal <- primaryModel %>% predict(xVal)
  yHatTst <- primaryModel %>% predict(xTst)
  
  # _____ Choosing a threshold _____
  df <- data.frame("predictions" = yHatTrn,
                   "labels" = yTrn)
  pred <- prediction(df$predictions, df$labels)
  perf <- performance(pred, "tpr", "fpr")
  par(pty = "s")
  plot(
    perf,
    col = "grey40",
    lwd = 3,
    main = paste("ROC M1 (Train) - Sigma = ", sigma)
  )
  # Line with a: intercept, b: slope, lty = line type
  
  abline(a = 0, b = 1, lty = "dotted")  
  # _______________
  
  thresholds <- seq(from = 0, to = 1, length.out = 11)
  pointsROC <- matrix(rep(NA, 2*length(thresholds)),
                      ncol = 2, nrow = length(thresholds))
  
  indGood <- NA
  for (i in length(thresholds):1) {
    th <- thresholds[i]
    
    TP <- sum(yHatTrn > th & yTrn == 1)
    FN <- sum(yHatTrn <= th & yTrn == 1)
    FP <- sum(yHatTrn > th & yTrn == 0)
    TN <- sum(yHatTrn <= th & yTrn == 0)
    
    TPR <- TP/(TP + FN)
    FPR <- FP/(FP + TN)
    
    if (TPR > 0.8 & is.na(indGood)) indGood <- i
    pointsROC[i, ] <- c(FPR, TPR)
    
    points(FPR, TPR, pch = 19, col = "blue")
  }
  
  # _____ Manual adjustment _____
  th <- thresholds[indGood]
  points(pointsROC[indGood, 1], pointsROC[indGood, 2], pch = 19, col = "red")
  
  yHatTrn <- yHatTrn > th
  yHatVal <- yHatVal > th
  yHatTst <- yHatTst > th
  
  # prediction_for_table <- predict(rf_classifier,validation1[,-5])
  # table(observed=validation1[,5],predicted=prediction_for_table)
  # _______________
  
  # _____ Metrics _____
  # _____ Train _____
  callMetrics <- getMetrics(actual = yTrn, predicted = yHatTrn)
  
  precisionM1Trn <- callMetrics$precision
  recallM1Trn <- callMetrics$recall
  f1ScoreM1Trn <- callMetrics$f1Score
  # _______________
  
  # _____ Validation _____
  callMetrics <- getMetrics(actual = yVal, predicted = yHatVal)
  
  precisionM1Val <- callMetrics$precision
  recallM1Val <- callMetrics$recall
  f1ScoreM1Val <- callMetrics$f1Score
  # _______________
  
  # _____ Test _____
  callMetrics <- getMetrics(actual = yTst, predicted = yHatTst)
  
  precisionM1Tst <- callMetrics$precision
  recallM1Tst <- callMetrics$recall
  f1ScoreM1Tst <- callMetrics$f1Score
  # _______________
  
  # _____ Secondary Model _____
  
  xM2Trn <- cbind(yHatTrn, xTrn)
  yM2Trn <- (yHatTrn == 1) & (yTrn == 1)
  
  xM2Val <- cbind(yHatVal, xVal)
  yM2Val <- (yHatVal == 1) & (yVal == 1)
  
  xM2Tst <- cbind(yHatTst, xTst)
  yM2Tst <- (yHatTst == 1) & (yTst == 1)
  
  set.seed(1998)
  secondaryModel <- keras_model_sequential() %>%
    layer_flatten(input_shape = ncol(xM2Trn)) %>%
    layer_dense(units = 25) %>%
    layer_activation_leaky_relu() %>%
    layer_dense(units = 1, activation = "sigmoid")
  
  # Input format:
  # x <-> Matrix
  # y <-> vector
  
  secondaryModel %>% compile(
    optimizer = "rmsprop",
    loss = "binary_crossentropy",
    metrics = c("Recall", "Precision")
  )
  
  history <- secondaryModel %>% fit(
    x = xM2Trn, 
    y = yM2Trn,
    epochs = 40,
    batch_size = 256,
    validation_data = list(xM2Val, yM2Val)
  )
  
  yHatM2Trn <- secondaryModel %>% predict_classes(xM2Trn)
  yHatM2Val <- secondaryModel %>% predict_classes(xM2Val)
  yHatM2Tst <- secondaryModel %>% predict_classes(xM2Tst)
  
  yHatMMTrn <- (yHatTrn == 1) & (yHatM2Trn == 1)
  yHatMMVal <- (yHatVal == 1) & (yHatM2Val == 1)
  yHatMMTst <- (yHatTst == 1) & (yHatM2Tst == 1)
  
  # _____ Train _____
  callMetrics <- getMetrics(actual = yTrn, predicted = yHatMMTrn)
  
  precisionMMTrn <- callMetrics$precision
  recallMMTrn <- callMetrics$recall
  f1ScoreMMTrn <- callMetrics$f1Score
  
  resultsTrn <- matrix(c(precisionM1Trn, precisionMMTrn,
                         recallM1Trn, recallMMTrn,
                         f1ScoreM1Trn, f1ScoreMMTrn), 
                       nrow = 3, ncol = 2, byrow = TRUE)
  rownames(resultsTrn) <- c("Precision", "Recall", "F1-Score")
  colnames(resultsTrn) <- c("Primary Model", "Meta Model")
  # _______________
  
  # _____ Validation _____
  callMetrics <- getMetrics(actual = yVal, predicted = yHatMMVal)
  
  precisionMMVal <- callMetrics$precision
  recallMMVal <- callMetrics$recall
  f1ScoreMMVal <- callMetrics$f1Score
  
  resultsVal <- matrix(c(precisionM1Val, precisionMMVal,
                         recallM1Val, recallMMVal,
                         f1ScoreM1Val, f1ScoreMMVal), 
                       nrow = 3, ncol = 2, byrow = TRUE)
  rownames(resultsVal) <- c("Precision", "Recall", "F1-Score")
  colnames(resultsVal) <- c("Primary Model", "Meta Model")
  # _______________
  
  # _____ Test _____
  callMetrics <- getMetrics(actual = yTst, predicted = yHatMMTst)
  
  precisionMMTst <- callMetrics$precision
  recallMMTst <- callMetrics$recall
  f1ScoreMMTst <- callMetrics$f1Score
  
  resultsTst <- matrix(c(precisionM1Tst, precisionMMTst,
                         recallM1Tst, recallMMTst,
                         f1ScoreM1Tst, f1ScoreMMTst), 
                       nrow = 3, ncol = 2, byrow = TRUE)
  rownames(resultsTst) <- c("Precision", "Recall", "F1-Score")
  colnames(resultsTst) <- c("Primary Model", "Meta Model")
  # _______________
  
  allResults[[indexSigma]] <- resultsTst
  
  print(resultsTrn)
  print(resultsVal)
  print(resultsTst)
}

##           Primary Model Meta Model
## Precision             1          1
## Recall                1          1
## F1-Score              1          1
##           Primary Model Meta Model
## Precision             1          1
## Recall                1          1
## F1-Score              1          1
##           Primary Model Meta Model
## Precision             1          1
## Recall                1          1
## F1-Score              1          1

##           Primary Model Meta Model
## Precision     1.0000000  1.0000000
## Recall        0.9969231  0.9969231
## F1-Score      0.9984592  0.9984592
##           Primary Model Meta Model
## Precision     1.0000000  1.0000000
## Recall        0.9875000  0.9875000
## F1-Score      0.9937107  0.9937107
##           Primary Model Meta Model
## Precision             1          1
## Recall                1          1
## F1-Score              1          1

##           Primary Model Meta Model
## Precision     0.9966555  0.9966555
## Recall        0.9169231  0.9169231
## F1-Score      0.9551282  0.9551282
##           Primary Model Meta Model
## Precision     1.0000000  1.0000000
## Recall        0.8625000  0.8625000
## F1-Score      0.9261745  0.9261745
##           Primary Model Meta Model
## Precision     1.0000000  1.0000000
## Recall        0.8842105  0.8842105
## F1-Score      0.9385475  0.9385475

##           Primary Model Meta Model
## Precision     0.9409938  0.9409938
## Recall        0.9323077  0.9323077
## F1-Score      0.9366306  0.9366306
##           Primary Model Meta Model
## Precision     0.9012346  0.9012346
## Recall        0.9125000  0.9125000
## F1-Score      0.9068323  0.9068323
##           Primary Model Meta Model
## Precision     0.9456522  0.9456522
## Recall        0.9157895  0.9157895
## F1-Score      0.9304813  0.9304813

##           Primary Model Meta Model
## Precision     0.8597015  0.8597015
## Recall        0.8861538  0.8861538
## F1-Score      0.8727273  0.8727273
##           Primary Model Meta Model
## Precision     0.8192771  0.8192771
## Recall        0.8500000  0.8500000
## F1-Score      0.8343558  0.8343558
##           Primary Model Meta Model
## Precision     0.9111111  0.9111111
## Recall        0.8631579  0.8631579
## F1-Score      0.8864865  0.8864865

##           Primary Model Meta Model
## Precision     0.7500000  0.7500000
## Recall        0.9230769  0.9230769
## F1-Score      0.8275862  0.8275862
##           Primary Model Meta Model
## Precision     0.7000000  0.7000000
## Recall        0.8750000  0.8750000
## F1-Score      0.7777778  0.7777778
##           Primary Model Meta Model
## Precision     0.7350427  0.7350427
## Recall        0.9052632  0.9052632
## F1-Score      0.8113208  0.8113208

##           Primary Model Meta Model
## Precision     0.7263682  0.7263682
## Recall        0.8984615  0.8984615
## F1-Score      0.8033012  0.8033012
##           Primary Model Meta Model
## Precision     0.6764706  0.6764706
## Recall        0.8625000  0.8625000
## F1-Score      0.7582418  0.7582418
##           Primary Model Meta Model
## Precision     0.6747967  0.6747967
## Recall        0.8736842  0.8736842
## F1-Score      0.7614679  0.7614679

##           Primary Model Meta Model
## Precision     0.7020202  0.7005076
## Recall        0.8553846  0.8492308
## F1-Score      0.7711512  0.7677330
##           Primary Model Meta Model
## Precision     0.6666667  0.6666667
## Recall        0.8500000  0.8500000
## F1-Score      0.7472527  0.7472527
##           Primary Model Meta Model
## Precision     0.6585366  0.6585366
## Recall        0.8526316  0.8526316
## F1-Score      0.7431193  0.7431193

##           Primary Model Meta Model
## Precision     0.6620690  0.6966292
## Recall        0.8861538  0.7630769
## F1-Score      0.7578947  0.7283407
##           Primary Model Meta Model
## Precision     0.6071429  0.6597938
## Recall        0.8500000  0.8000000
## F1-Score      0.7083333  0.7231638
##           Primary Model Meta Model
## Precision     0.5886525  0.6752137
## Recall        0.8736842  0.8315789
## F1-Score      0.7033898  0.7452830

##           Primary Model Meta Model
## Precision     0.6343612  0.6871345
## Recall        0.8861538  0.7230769
## F1-Score      0.7394095  0.7046477
##           Primary Model Meta Model
## Precision     0.5964912  0.6451613
## Recall        0.8500000  0.7500000
## F1-Score      0.7010309  0.6936416
##           Primary Model Meta Model
## Precision     0.5845070  0.6725664
## Recall        0.8736842  0.8000000
## F1-Score      0.7004219  0.7307692